﻿#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""金山词霸生词本转换程序
@version: 0.1
@author: U{Liu Qing}
"""
__author__ =  'Liu Qing'
__version__=  '0.1'
__nonsense__ = ''

import os, sys, getopt, codecs, string

import ConfigParser

def writeConfig(filename):

        config = ConfigParser.ConfigParser()

        # set app

        section_name = 'app'

        config.add_section( section_name  )

        config.set( section_name, 'newinputfile', 'newinput.txt')
        config.set( section_name, 'oldoutputfile', 'oldoutput.txt')
        config.set( section_name, 'oldinputfile', 'oldinput.txt')
        config.set( section_name, 'newoutputfile', 'newoutput.txt')
        config.set( section_name, 'test', 'init')

        # write to file

        config.write( open(filename, 'w') )

        

def updateConfig(filename, section, **keyv):

        config = ConfigParser.ConfigParser()

        config.read(filename)

        [config.set(section, key, keyv[key]) for key in keyv if config.has_option(section, key)]

        config.write( open(filename, 'r+') )

def readConfig(filename, section, key):

        config = ConfigParser.ConfigParser()

        config.read(filename)
        if config.has_option(section, key):
            return config.get(section, key) 
        else:
            print u'配置文件错误，请初始化或修改配置文件。'.encode('gbk')

config_file_name = 'kingsoft-word-trans.ini'

new_to_old_dict = {u'æ':u'A',u'ɑ':u'B',u'ə':u'E',u'ʃ':u'F',u'ŋ':u'N',
    u'ʌ':u'Q',u'ɔ':u'R',u'ð':u'T',u'ʒ':u'V',u'θ':u'W',u'ɛ':u'Z',
	u'ˈ':u"5",u'ˌ':u'7',u'ː':u':',u'ɪ':u'I',u'ʊ':u'J',u'ɜ':u'\\',u'ɡ':u'g'}
old_to_new_dict = {}
for new_char in new_to_old_dict.keys():
    old_to_new_dict[new_to_old_dict[new_char]] = new_char
#    print old_to_new_dict
def phonetic_transfer(phonetic,new = True):
    if new:
        trans_dict = new_to_old_dict
    else:
        trans_dict = old_to_new_dict
    for char in trans_dict.keys():
        if char in phonetic:
            phonetic = phonetic.replace(char,trans_dict[char])
    return phonetic


if __name__ == '__main__':
    while True:
        print u"金山词霸生词本转换".encode('gbk')
        print u"1 老转新".encode('gbk')
        print u"2 新转老".encode('gbk')
        print u"9 初始化".encode('gbk')
        print u"h 帮助".encode('gbk')
        print u"0 退出".encode('gbk')

        choice = raw_input(u'请选择：'.encode('gbk'))
        if choice ==  '0':
            break
        elif choice ==  '9':
            writeConfig(config_file_name)
            updateConfig(config_file_name, 'app', test = 'changed')
        elif choice == 'h':
            print u'可修改kingsoft-word-trans.ini文件进行配置。'.encode('gbk')
        elif choice ==  '2':
            inputfile = codecs.open(readConfig(config_file_name, 'app', 'newinputfile'),'r','utf-16')
            #inputfile = codecs.open(u'newinput.txt','r','utf-16')
        #    inputfile = codecs.open(u'D:\lhj\gsq\我的生词本.txt','r','utf-16')
            outputfile = codecs.open(readConfig(config_file_name, 'app',
                        'oldoutputfile'),'w','gbk')
        #    outputfile = codecs.open(u'D:\lhj\gsq\追加.txt','w','gbk')
            line_number = 0
            atpron = False
            for line in inputfile.readlines():
                if line[0:1] == '+' :
                    line_number += 1
                    wordline = line[1:len(line)-2]

                elif line[0:1] == '#' :
                    if not atpron :
                        wordline += line[0:len(line)-2]
                        atpron = True
                    else:
                        wordline += line[1:len(line)-2]

                elif line[0:1] == '&' and line[1:]!=None:
                    atpron = False
                    line = phonetic_transfer(line)
                    wordline += '#'
                    wordline += line[1:len(line)-2]

                elif line[0:1] == '$':
                    wordline += '\r\n'
                    #print wordline
                    try:
                        outputfile.write(wordline)
                    except UnicodeEncodeError:
                        print wordline.encode('utf-8')
                        print wordline.encode('gb18030')

                else:
                    wordline += line[0:len(line)-2]
        
            print line_number			
            outputfile.close

        elif choice ==  '1' :
            inputfile = codecs.open(readConfig(config_file_name, 'app',
                        'oldinputfile'),'r','gbk')
            outputfile = codecs.open(readConfig(config_file_name, 'app',
                        'newoutputfile'),'w','utf-16')
            line_number = 0
            for line in inputfile.readlines():
                word = string.split(line,'#',2)
                wordline = '+' + word[0].strip() + '\r\n'\
                    + '#' + word[1].strip() + '\r\n'\
                    + '&' + phonetic_transfer(word[2][0:-2],False).strip() + '\r\n'\
                    + '$3' + '\r\n'
                #print wordline
                try:
                    outputfile.write(wordline)
                except UnicodeEncodeError:
                    print wordline.encode('utf-8')
                    print wordline.encode('gb18030')

            outputfile.close
